Resolving geographic units that do not neatly coincide is a common problem in spatial data analysis. This method attempts to conflate King County Health Reporting Areas (HRAs) to US Census tracts. In the cases where a given tract with entirely within a HRA, that tract receives the HRA’s ID. Where a given tract overlaps multiple HRAs block-level census data is used to determine which HRA ID to assign to the tract.
This method provides three alternatives of block-level counts that can be used:
if(!file.exists(root_file('1-data/4-interim/kc-blk-sp.gpkg'))){
pop <- read_csv(root_file('1-data/3-external/manual/wa-blk/DEC_10_SF1_P1/DEC_10_SF1_P1_with_ann.csv'),
col_types = cols(Id2 = col_character()),
skip = 1) %>%
mutate(GEO_ID_BLK = Id2,
GEOID_TR = substr(Id2,start = 1,stop = 11),
POP = Total) %>%
select(GEO_ID_BLK,GEOID_TR,POP)
hu <- read_csv(root_file('1-data/3-external/manual/wa-blk/DEC_10_SF1_H1/DEC_10_SF1_H1_with_ann.csv'),
col_types = cols(Id2 = col_character()),
skip = 1) %>%
mutate(GEO_ID_BLK = Id2,
HU = Total) %>%
select(GEO_ID_BLK,HU)
pophu <- read_csv(root_file('1-data/3-external/manual/wa-blk/DEC_10_SF1_H10/DEC_10_SF1_H10_with_ann.csv'),
col_types = cols(Id2 = col_character()),
skip = 1) %>%
mutate(GEO_ID_BLK = Id2,
POPHU = Total) %>%
select(GEO_ID_BLK,POPHU)
cnts <- left_join(pop,hu,by = 'GEO_ID_BLK') %>%
left_join(pophu,by = 'GEO_ID_BLK')
# Source for KC Blocks spatial data
if(!file.exists(root_file('1-data/3-external/wa-blk/blocks10/blocks10.shp'))){
url <- 'ftp://ftp.kingcounty.gov/gis-web/web/GISData/blocks10_SHP.zip' # direct URL to the file download
temp <- tempfile() # create a temporary file to hold the compressed download
download(url, dest = temp, mode='wb') # download the file
unzip (temp, exdir = root_file('1-data/3-external/wa-blk')) # extract the file to the project folder
}
kc_blk <- readOGR(dsn = root_file('1-data/3-external/wa-blk/blocks10/'),
layer = 'blocks10',
verbose = FALSE,
stringsAsFactors = FALSE)
kc_blk@data %<>% left_join(cnts, by = 'GEO_ID_BLK') %>%
select(GEOID_BLK = GEO_ID_BLK,
GEOID_TR:POPHU)
kc_blk %>% writeOGR(dsn = root_file('1-data/4-interim/kc-blk-sp.gpkg'),
layer = 'kc_blk_sp',
driver = 'GPKG',
overwrite_layer = TRUE,verbose = FALSE)
}
kc_blk_sp <- readOGR(dsn = root_file('1-data/4-interim/kc-blk-sp.gpkg'),
layer = 'kc_blk_sp',
stringsAsFactors = FALSE,
verbose = FALSE)
The following actions are performed in this method:
class = SpatialPointsDataFrame)sp::over())POP,HU,POPHU) are summed# Pass HRA IDs to the block centroids
hra <- readOGR(dsn = root_file('1-data/3-external/manual/HRA_2010Block_Clip/'),layer = 'HRA_2010Block_Clip',
verbose = FALSE,stringsAsFactors = FALSE)
kc_blk_cnt_sp <- SpatialPointsDataFrame(coords = rgeos::gCentroid(kc_blk_sp,byid = TRUE),
data = as.data.frame(kc_blk_sp@data),
match.ID = FALSE)
kc_blk_cnt_sp$HRA_ID <- sp::over(kc_blk_cnt_sp,hra[,'HRA2010v2_']) %>% unlist
kc_blk_cnt_sp@data %<>% mutate(HRA_ID = ifelse(is.na(HRA_ID),'None',HRA_ID))
# Assign HRAs to tracts
if(!file.exists(root_file('1-data/4-interim/kc-hra-tr-sp.gpkg'))){
if(!file.exists(root_file('1-data/3-external/kc-tr/tracts10_shore/tracts10_shore.shp'))){
url <- 'ftp://ftp.kingcounty.gov/gis-web/web/GISData/tracts10_shore_SHP.zip' # direct URL to the file download
temp <- tempfile() # create a temporary file to hold the compressed download
download(url, dest = temp, mode='wb') # download the file
unzip (temp, exdir = root_file('1-data/3-external/kc-tr')) # extract the file to the project folder
}
kc_tr_sp <- readOGR(dsn = root_file('1-data/3-external/kc-tr/tracts10_shore/'),
layer = 'tracts10_shore',
verbose = FALSE,
stringsAsFactors = FALSE)
kc_tr_sp@data %<>% select(GEOID_TR = GEO_ID_TRT)
first_notNA <- function(x){first(x[!is.na(x)])}
tr_hra_ids <-
kc_blk_cnt_sp@data %>%
as.data.frame() %>%
gather('VAR','COUNT',POP:POPHU) %>%
group_by(GEOID_TR,VAR,HRA_ID) %>%
summarise(SUM = sum(COUNT)) %>%
arrange(desc(SUM)) %>%
slice(1) %>%
spread(VAR,HRA_ID) %>%
mutate(HU_CNT = ifelse(!is.na(HU),SUM,NA_integer_),
POP_CNT = ifelse(!is.na(POP),SUM,NA_integer_),
POPHU_CNT = ifelse(!is.na(POPHU),SUM,NA_integer_)) %>%
select(-SUM) %>%
group_by(GEOID_TR) %>%
summarise_all(funs(first_notNA)) %>%
ungroup() %>%
select(GEOID_TR,
POP = POP_CNT,
HU = HU_CNT,
POPHU = POPHU_CNT,
HRA_POP = POP,
HRA_HU = HU,
HRA_POPHU = POPHU)
tr_hra_ids2 <-
tr_hra_ids %>%
select(GEOID_TR,matches('HRA')) %>%
gather("VAR","HRA",matches('HRA')) %>%
group_by(GEOID_TR) %>%
summarise(ALLEQ = length(unique(HRA))==1) %>%
left_join(tr_hra_ids,.,by = 'GEOID_TR')
# Get the percentage for each count variable
pop_tr <-
read_csv(root_file("1-data/3-external/manual/kc-tr/DEC_10_SF1_P1/DEC_10_SF1_P1_with_ann.csv"),
col_types = cols(GEO.id2 = col_character(),
Geography = col_skip(),
Id = col_skip(),
Id2 = col_character()),
skip = 1) %>%
select(GEOID_TR = Id2,
POP_TR = Total)
hu_tr <-
read_csv(root_file('1-data/3-external/manual/kc-tr/DEC_10_SF1_H1/DEC_10_SF1_H1_with_ann.csv'),
col_types = cols(GEO.id2 = col_character(),
Geography = col_skip(),
Id = col_skip(),
Id2 = col_character()),
skip = 1)%>%
select(GEOID_TR = Id2,
HU_TR = Total)
pophu_tr <-
read_csv(root_file('1-data/3-external/manual/kc-tr/DEC_10_SF1_H10/DEC_10_SF1_H10_with_ann.csv'),
col_types = cols(GEO.id2 = col_character(),
Geography = col_skip(),
Id = col_skip(),
Id2 = col_character()),
skip = 1) %>%
select(GEOID_TR = Id2,
POPHU_TR = Total)
kc_tr_sp@data %<>%
left_join(.,tr_hra_ids2,by = 'GEOID_TR') %>%
left_join(.,pop_tr,by = 'GEOID_TR') %>%
left_join(.,hu_tr,by = 'GEOID_TR') %>%
left_join(.,pophu_tr,by = 'GEOID_TR') %>%
mutate(POP_PCT = round_any(POP/POP_TR,accuracy = .01),
HU_PCT = round_any(HU/HU_TR,accuracy = .01),
POPHU_PCT = round_any(POPHU/POPHU_TR,accuracy = .01)) %>%
select(GEOID_TR,
POP,POP_PCT,
HU, HU_PCT,
POPHU, POPHU_PCT,everything())
kc_tr_sp %>%
writeOGR(dsn = root_file('1-data/4-interim/kc-hra-tr-sp.gpkg'),
layer = 'kc_hra_tr_sp',
driver = 'GPKG',
verbose = FALSE,
overwrite_layer = TRUE)
}
kc_hra_tr_sp <-
readOGR(dsn = root_file('1-data/4-interim/kc-hra-tr-sp.gpkg'),
layer = 'kc_hra_tr_sp',
verbose = FALSE) %>%
spTransform(crs_proj)
After running the assignment algorithm, it is clear that the POP and POPHU variables result in the same HRA assignments. HU differs from the other two variables in three of the tracts:
| GEOID_TR | HRA_POP | HRA_POPHU | HRA_HU |
|---|---|---|---|
| 53033022202 | Kirkland North | Kirkland North | Kirkland |
| 53033025001 | Bellevue-South | Bellevue-South | Newcastle/Four Creeks |
| 53033028801 | SeaTac/Tukwila | SeaTac/Tukwila | Des Moines/Normandy Park |
mypal <- RColorBrewer::brewer.pal(8,name = 'Set2')[-8]
shuffled_hra_pop <- forcats::fct_shuffle(kc_hra_tr_sp$HRA_POP) %>% factor(ordered = T)
shuffled_hra_hu <- forcats::fct_shuffle(kc_hra_tr_sp$HRA_HU) %>% factor(ordered = T)
shuffled_hra_pophu <- forcats::fct_shuffle(kc_hra_tr_sp$HRA_POPHU) %>% factor(ordered = T)
pal_pop <- colorFactor(palette = mypal,domain = shuffled_hra_pop)
pal_hu <- colorFactor(palette = mypal,domain = shuffled_hra_hu)
pal_pophu <- colorFactor(palette = mypal,domain = shuffled_hra_pophu)
hra %<>% spTransform(crs_proj)
show_hra_tr_pop <- function(){
myLfltGrey(bumpLabels = FALSE,hideControls = FALSE) %>%
addProviderTiles(providers$CartoDB) %>%
addPolygons(data = kc_hra_tr_sp,
smoothFactor = 0,
weight = 1,
color = col2hex("white"),
opacity = .85,
fillColor = ~pal_pop(shuffled_hra_pop),
fillOpacity = .5,
group = 'HRA Tract by Pop.',
popup = ~paste0(kc_hra_tr_sp$GEOID_TR)) %>%
addPolygons(data = hra,
smoothFactor = 0,
fillOpacity = 0,
weight = 2,
color = ~pal_pop(factor(hra$HRA2010v2_,levels = levels(shuffled_hra_pop),ordered = TRUE)),
opacity = 1,
group = 'HRAs',
popup = ~paste0(hra$HRA2010v2_)) %>%
addLayersControl(overlayGroups = c('HRA Tract by Pop.',
'HRAs'),
position = 'topright',options = layersControlOptions(FALSE))
}
show_hra_tr_hu <- function(){
myLfltGrey(bumpLabels = FALSE,hideControls = FALSE) %>%
addProviderTiles(providers$CartoDB) %>%
addPolygons(data = kc_hra_tr_sp,
smoothFactor = 0,
weight = 1,
color = col2hex("white"),
opacity = .85,
fillColor = ~pal_hu(shuffled_hra_hu),
fillOpacity = .5,
group = 'HRA Tract by Pop.',
popup = ~paste0(kc_hra_tr_sp$GEOID_TR)) %>%
addPolygons(data = hra,
smoothFactor = 0,
fillOpacity = 0,
weight = 2,
color = ~pal_hu(factor(hra$HRA2010v2_,levels = levels(shuffled_hra_hu),ordered = TRUE)),
opacity = 1,
group = 'HRAs',
popup = ~paste0(hra$HRA2010v2_)) %>%
addLayersControl(overlayGroups = c('HRA Tract by Pop.',
'HRAs'),
position = 'topright',options = layersControlOptions(FALSE))
}
show_hra_tr_pophu <- function(){
myLfltGrey(bumpLabels = FALSE,hideControls = FALSE) %>%
addProviderTiles(providers$CartoDB) %>%
addPolygons(data = kc_hra_tr_sp,
smoothFactor = 0,
weight = 1,
color = col2hex("white"),
opacity = .85,
fillColor = ~pal_pophu(shuffled_hra_pophu),
fillOpacity = .5,
group = 'HRA Tract by Pop.',
popup = ~paste0(kc_hra_tr_sp$GEOID_TR)) %>%
addPolygons(data = hra,
smoothFactor = 0,
fillOpacity = 0,
weight = 2,
color = ~pal_pophu(factor(hra$HRA2010v2_,levels = levels(shuffled_hra_pophu),ordered = TRUE)),
opacity = 1,
group = 'HRAs',
popup = ~paste0(hra$HRA2010v2_)) %>%
addLayersControl(overlayGroups = c('HRA Tract by Pop.',
'HRAs'),
position = 'topright',options = layersControlOptions(FALSE))
}
# Save the maps as HTML documents
if(!file.exists(root_file('3-communication/others/html/hra-tracts-pop.html'))){
show_hra_tr_pop() %>%
saveWidget(file = root_file('3-communication/others/html/hra-tracts-pop.html'),
selfcontained = FALSE,
libdir = root_file('3-communication/others/html/html_support_files'))
}
if(!file.exists(root_file('3-communication/others/html/hra-tracts-hu.html'))){
show_hra_tr_pop() %>%
saveWidget(file = root_file('3-communication/others/html/hra-tracts-hu.html'),
selfcontained = FALSE,
libdir = root_file('3-communication/others/html/html_support_files'))
}
if(!file.exists(root_file('3-communication/others/html/hra-tracts-pophu.html'))){
show_hra_tr_pop() %>%
saveWidget(file = root_file('3-communication/others/html/hra-tracts-pophu.html'),
selfcontained = FALSE,
libdir = root_file('3-communication/others/html/html_support_files'))
}
show_hra_tr_pop()